US Educational Inequalities

Duncan Gates

17 December, 2020

1 Tidycensus Capabilities

Check what variables are available from the 5 year American Community Survey in 2019.

all_2019_variables <- load_variables(2019, "acs5")

Obtain data such as median home value easily:

1.1 County level data

1.2 Using mapview

2 Getting Educational Data

2.1 The Actual Educational Data

Digging into 2016-2019 census data by using each one, this takes significant time on the census website learning the codes and API formatting.

get_historic_acs <- function(variables, 
                              geography, 
                              year, 
                              summary_var = NULL) {
y <- list()
for (i in 1:length(year)) {
  y[[i]] <- lapply(geography, function (x) {
      tidycensus::get_acs (geography = x, 
                           variables = variables, 
                           summary_var = summary_var, 
                           output = "tidy", 
                           year = year[i])}) %>%
      bind_rows() %>% 
      mutate(year = year[i]) } 
  y %>% bind_rows() } # Function to get all data instead of just most recent

year_range <- 2016:2019

geos_inc <- c("county")

ed_variable <- c('DP02_0059P', 
              'DP02_0060P', 
              'DP02_0061P', 
              'DP02_0062P', 
              'DP02_0063P', 
              'DP02_0064P', 
              'DP02_0065P')

ed_labels <- c('Less than 9th Grade', 
               '9th to 12th grade, no diploma', 
               'High school graduate', 
               'Some college, no degree', 
               "Associate's degree", 
               "Bachelor's degree", 
               'Grad/pro degree')

ed_level <- c(1:7)

ed_table <- as_tibble(bind_cols(ed_variable, ed_level, ed_labels), stringsAsFactors = FALSE)

ed_data <- get_historic_acs(variables = ed_variable, 
                            geography = geos_inc, 
                            year = 2018)
ed_data <- ed_data %>% 
  mutate(variable = case_when(variable == "DP02_0059P" ~ 'Less than 9th Grade',
                              variable == "DP02_0060P" ~ '9th to 12th grade, no diploma',
                              variable == "DP02_0061P" ~ 'High school graduate',
                              variable == "DP02_0062P" ~ 'Some college, no degree',
                              variable == "DP02_0063P" ~ 'Associate\'s degree',
                              variable == "DP02_0064P" ~ 'Bachelor\'s degree',
                              variable == "DP02_0065P" ~ 'Grad/pro degree'))

ed_data_2 <- get_acs(geography = "county",
                  variables = ed_variable,
                  year = 2018,
                  geometry = TRUE,
                  summary_var = "DP02_0059P",
                  shift_geo = T)
ed_data_2 <- ed_data_2 %>% 
  mutate(variable = case_when(variable == "DP02_0059P" ~ 'Less than 9th Grade',
                              variable == "DP02_0060P" ~ '9th to 12th grade, no diploma',
                              variable == "DP02_0061P" ~ 'High school graduate',
                              variable == "DP02_0062P" ~ 'Some college, no degree',
                              variable == "DP02_0063P" ~ 'Associate\'s degree',
                              variable == "DP02_0064P" ~ 'Bachelor\'s degree',
                              variable == "DP02_0065P" ~ 'Grad/pro degree'))

2.2 Secondary Data for Plotting and Comparison

variable <-c("DP02_0011P", 
             "DP02_0067P",
             "DP02_0069P",
             "DP02_0090P", 
             "DP02_0092P", 
             "DP02_0111P", 
             "DP03_0005P", 
             "DP03_0021P", 
             "DP03_0028P",
             "DP03_0088",
             "DP03_0096P",
             "DP03_0128P")

label <- c("%Householders living alone", 
           "%Bachelor's degree or higher", 
           "%Civilian veterans",
           "%Born different state", 
           "%Foreign born",
           "%Speak English only @ home", 
           "%Civilian LF - Unemployed",  
           "%Public trans to work", 
           "%Service occupations", 
           "$Per capita income", 
           "%Health insurance", 
           "%Below FPL - All people")
dp_table <- as.data.frame(cbind(variable, label))

dp_data <- get_historic_acs(variables=variable, 
                            geography = geos_inc, 
                            year = year_range) # REMINDER TO GGANIMATE LAST 5 YEARS

2.3 Another way to get race data

# race_variables <- c("B02001_001",
#                     "B02001_002",
#                     "B02001_003",
#                     "B02001_004",
#                     "B02001_005")
# 
# race_labels <- c("Total",
#                  "White",
#                  "Black",
#                  "American Indian",
#                  "Asian")
# 
# race_table <- as_tibble(bind_cols(race_variables, race_labels))
# 
# race_data <- get_historic_acs(variables = race_variables, 
#                             geography = geos_inc, 
#                             year = year_range)
# 
# race_data <- race_data %>% mutate()

# Easier way to do it
race_data2 <- get_acs(geography = "county",
                  variables = racevars,
                  year = 2018,
                  geometry = TRUE,
                  summary_var = "B02001_001",
                  shift_geo = T)
## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Please note: Alaska and Hawaii are being shifted and are not to scale.

2.4 Joining the data

# ed_data <- ed_data %>%
#   left_join(ed_table)
ed_data_join <- ed_data %>% select(variable, GEOID, NAME, estimate) %>% rename(education_level = variable)
race_data_join <- race_data2 %>% mutate(estimate_percent = estimate/summary_est) %>% select(GEOID, NAME, variable, estimate_percent, geometry) %>% rename(race = variable)
test <- ed_data_join %>% left_join(race_data_join, by = c("GEOID", "NAME"))

2.5 A plot of educational attainment that should only be run on specific counties

2.6 Map of the racial data

race_data_join %>%
  mutate(Percent = 100 * (estimate_percent)) %>%
  ggplot(aes(fill = Percent, color = Percent)) +
  facet_wrap(~ race) +
  geom_sf() +
  scale_fill_viridis_c(direction = -1) +
  scale_color_viridis_c(direction = -1) +
  labs(title = "Racial geography of the US",
       caption = "Source: American Community Survey") +
  theme_void()

2.7 Map of the Educational Data

# just_geometry <- race_data_join %>% select(geometry, GEOID)
# ed_data_join <- ed_data_join %>% left_join(just_geometry, by = "GEOID")
ed_data_2 %>%
  ggplot(aes(fill = estimate, color = estimate)) +
  facet_wrap(~ factor(variable, levels = c('Less than 9th Grade', 
               '9th to 12th grade, no diploma', 
               'High school graduate', 
               'Some college, no degree', 
               "Associate's degree", 
               "Bachelor's degree", 
               'Grad/pro degree'))) +
  geom_sf() +
  scale_fill_viridis_c(direction = -1) +
  scale_color_viridis_c(direction = -1) +
  labs(title = "Educational Geography of the US",
       caption = "Source: American Community Survey",
       color = "Percent") +
  theme_void() + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5))

3 Visualizing the Relationship between Race and Education

race_data_state <- get_acs(geography = "state",
                  variables = racevars,
                  year = 2018,
                  geometry = TRUE,
                  summary_var = "B02001_001",
                  shift_geo = T)
## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Please note: Alaska and Hawaii are being shifted and are not to scale.
race_data_state <- race_data_state %>% as_tibble() %>% 
  select(-geometry, -GEOID, -summary_moe, -moe) %>% 
  rename(race = variable) %>% 
  mutate(race_percent = 100*(estimate/summary_est)) %>%
  select(-summary_est, -estimate)
ed_data_state <- get_acs(geography = "state",
                  variables = ed_variable,
                  year = 2018,
                  geometry = TRUE,
                  summary_var = "DP02_0059P",
                  shift_geo = T)
## Getting data from the 2014-2018 5-year ACS
## Using feature geometry obtained from the albersusa package
## Using the ACS Data Profile
## Please note: Alaska and Hawaii are being shifted and are not to scale.
ed_data_state <- ed_data_state %>% as_tibble() %>% 
  select(-geometry, -GEOID, -summary_moe, -moe) %>% 
  rename(education_level = variable) %>% 
  select(-summary_est) %>% 
  mutate(education_level = case_when(education_level == "DP02_0059P" ~ 'Less than 9th Grade',
                              education_level == "DP02_0060P" ~ '9th to 12th grade, no diploma',
                              education_level == "DP02_0061P" ~ 'High school graduate',
                              education_level == "DP02_0062P" ~ 'Some college, no degree',
                              education_level == "DP02_0063P" ~ 'Associate\'s degree',
                              education_level == "DP02_0064P" ~ 'Bachelor\'s degree',
                              education_level == "DP02_0065P" ~ 'Grad/pro degree'))


race_education_state <- ed_data_state %>% left_join(race_data_state, by = c("NAME")) %>% rename(education_percent = estimate, state = NAME)

# Plot to go here

3.1 Chord Diagram (Not Yet Finished)

library(chorddiag)
matrix_race <- race_data_join %>% as_tibble() %>% select(-geometry, -GEOID) %>% mutate(estimate_percent = 100 * estimate_percent)
matrix_ed <- ed_data_2 %>% 
  as_tibble() %>%
  select(-geometry, -GEOID, -summary_est, -summary_moe, -moe) %>% 
  pivot_wider(names_from = NAME, values_from = estimate)
matrix_ed_race <- matrix_ed %>%
  inner_join(matrix_race, by = c(""))

m <- matrix(c(11975,  5871, 8916, 2868,
              1951, 10048, 2060, 6171,
              8010, 16145, 8090, 8045,
              1013,   990,  940, 6907),
            byrow = TRUE,
            nrow = 4, ncol = 4)

races <- c("White", "Black", "Asian", "Hispanic")
dimnames(m) <- list(have = races,
                    prefer = races)
groupColors <- c("#000000", "#FFDD89", "#957244", "#F26223")

# Build the chord diagram:
p <- chorddiag(m, groupColors = groupColors, groupnamePadding = 20)
p
 

A work by Duncan Gates

gatesdu@oregonstate.edu